{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {},
  "cells": [
    {
      "metadata": {},
      "source": [
        "<td>\n",
        "   <a target=\"_blank\" href=\"https://labelbox.com\" ><img src=\"https://labelbox.com/blog/content/images/2021/02/logo-v4.svg\" width=256/></a>\n",
        "</td>\n",
        "\n"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "\n",
        "<td>\n",
        "\n",
        "<a href=\"https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/annotation_import/conversational_LLM.ipynb\" target=\"_blank\"><img\n",
        "src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
        "</td>\n",
        "\n",
        "<td>\n",
        "\n",
        "<a href=\"https://github.com/Labelbox/labelbox-python/tree/master/examples/annotation_import/conversational_LLM.ipynb\" target=\"_blank\"><img\n",
        "src=\"https://img.shields.io/badge/GitHub-100000?logo=github&logoColor=white\" alt=\"GitHub\"></a>\n",
        "</td>"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "# LLM pairwise comparison with Conversational text using MAL and Ground truth\n",
        "This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis.\n"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "!pip install -q \"labelbox[data]\""
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "# Set up"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "import labelbox as lb\n",
        "import labelbox.types as lb_types\n",
        "import uuid"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "# Replace with your API key"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "API_KEY = \"\"\n",
        "client = lb.Client(api_key=API_KEY)"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "# Supported annotations for conversational text"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "### Entity "
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "ner_annotation = lb_types.ObjectAnnotation(\n",
        "    name=\"ner\",\n",
        "    value=lb_types.ConversationEntity(\n",
        "        start=0,\n",
        "        end=8,\n",
        "        message_id=\"message-1\"\n",
        "    )\n",
        ")\n",
        "\n",
        "ner_annotation_ndjson = {\n",
        "        \"name\": \"ner\",\n",
        "        \"location\": {\n",
        "            \"start\": 0,\n",
        "            \"end\": 8\n",
        "        },\n",
        "        \"messageId\": \"message-1\"\n",
        "    }"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "### Classification: Radio (single-choice)"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "radio_annotation = lb_types.ClassificationAnnotation(\n",
        "    name=\"Choose the best response\",\n",
        "    value=lb_types.Radio(answer=lb_types.ClassificationAnswer(\n",
        "        name=\"Response B\")))\n",
        "\n",
        "\n",
        "\n",
        "radio_annotation_ndjson = {\n",
        "    \"name\": \"Choose the best response\",\n",
        "    \"answer\": {\n",
        "      \"name\": \"Response B\"\n",
        "    }\n",
        "}\n"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "### Classification: Free-form text"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "text_annotation = lb_types.ClassificationAnnotation(\n",
        "    name=\"Provide a reason for your choice\",\n",
        "    value=lb_types.Text(answer=\"the answer to the text questions right here\")\n",
        ")\n",
        "\n",
        "\n",
        "text_annotation_ndjson = {\n",
        "    \"name\": \"Provide a reason for your choice\",\n",
        "    \"answer\": \"This is the more concise answer\"\n",
        "\n",
        "}"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "### Classification: Checklist (multi-choice)"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "checklist_annotation= lb_types.ClassificationAnnotation(\n",
        "  name=\"checklist_convo\", # must match your ontology feature\"s name\n",
        "  value=lb_types.Checklist(\n",
        "      answer = [\n",
        "        lb_types.ClassificationAnswer(\n",
        "            name = \"first_checklist_answer\"\n",
        "        ),\n",
        "        lb_types.ClassificationAnswer(\n",
        "            name = \"second_checklist_answer\"\n",
        "        )\n",
        "      ]\n",
        "    ),\n",
        "  message_id=\"message-1\" # Message specific annotation\n",
        " )\n",
        "\n",
        "\n",
        "checklist_annotation_ndjson = {\n",
        "    \"name\": \"checklist_convo\",\n",
        "    \"answers\": [\n",
        "        {\"name\": \"first_checklist_answer\"},\n",
        "        {\"name\": \"second_checklist_answer\"}\n",
        "    ],\n",
        "    \"messageId\": \"message-1\"\n",
        "}"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "### Classification: Nested radio and checklist"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "\n",
        "# Message based\n",
        "nested_checklist_annotation = lb_types.ClassificationAnnotation(\n",
        "  name=\"nested_checklist_question\",\n",
        "  message_id=\"message-1\",\n",
        "  value=lb_types.Checklist(\n",
        "    answer=[lb_types.ClassificationAnswer(\n",
        "      name=\"first_checklist_answer\",\n",
        "      classifications=[\n",
        "        lb_types.ClassificationAnnotation(\n",
        "          name=\"sub_checklist_question\",\n",
        "          value=lb_types.Checklist(\n",
        "            answer=[lb_types.ClassificationAnswer(\n",
        "            name=\"first_sub_checklist_answer\"\n",
        "          )]\n",
        "        ))\n",
        "      ]\n",
        "    )]\n",
        "  )\n",
        ")\n",
        "# Message based\n",
        "nested_checklist_annotation_ndjson = {\n",
        "  \"name\": \"nested_checklist_question\",\n",
        "  \"messageId\": \"message-1\",\n",
        "  \"answer\": [{\n",
        "      \"name\": \"first_checklist_answer\",\n",
        "      \"classifications\" : [\n",
        "        {\n",
        "          \"name\": \"sub_checklist_question\",\n",
        "          \"answer\": {\n",
        "            \"name\": \"first_sub_checklist_answer\",\n",
        "          }\n",
        "        }\n",
        "      ]\n",
        "  }]\n",
        "}\n",
        "# Global\n",
        "nested_radio_annotation = lb_types.ClassificationAnnotation(\n",
        "  name=\"nested_radio_question\",\n",
        "  value=lb_types.Radio(\n",
        "    answer=lb_types.ClassificationAnswer(\n",
        "      name=\"first_radio_answer\",\n",
        "      classifications=[\n",
        "        lb_types.ClassificationAnnotation(\n",
        "          name=\"sub_radio_question\",\n",
        "          value=lb_types.Radio(\n",
        "            answer=lb_types.ClassificationAnswer(\n",
        "              name=\"first_sub_radio_answer\"\n",
        "            )\n",
        "          )\n",
        "        )\n",
        "      ]\n",
        "    )\n",
        "  )\n",
        ")\n",
        "#Global\n",
        "nested_radio_annotation_ndjson = {\n",
        "  \"name\": \"nested_radio_question\",\n",
        "  \"answer\": {\n",
        "      \"name\": \"first_radio_answer\",\n",
        "      \"classifications\": [{\n",
        "          \"name\":\"sub_radio_question\",\n",
        "          \"answer\": { \"name\" : \"first_sub_radio_answer\"}\n",
        "        }]\n",
        "    }\n",
        "}\n",
        "\n"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "## Step 1: Import data rows with \"modelOutputs\" into Catalog"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "In addition to your message based data, you will need to add a list of model outputs to your JSON file:\n",
        "\n",
        "```\n",
        "\"modelOutputs\" : [\n",
        "  {\n",
        "      \"title\": \"Name of the response option\",\n",
        "      \"content\": \"Content of the response\",\n",
        "      \"modelConfigName\": \"Name of model configuration\"\n",
        "  }\n",
        "]\n",
        "```\n"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "#### Example of row_data with model outputs"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "pairwise_shopping_2 =  \"\"\"\n",
        " {\n",
        "  \"type\": \"application/vnd.labelbox.conversational\",\n",
        "  \"version\": 1,\n",
        "  \"messages\": [\n",
        "    {\n",
        "      \"messageId\": \"message-0\",\n",
        "      \"timestampUsec\": 1530718491,\n",
        "      \"content\": \"Hi! How can I help?\",\n",
        "      \"user\": {\n",
        "        \"userId\": \"Bot 002\",\n",
        "        \"name\": \"Bot\"\n",
        "      },\n",
        "      \"align\": \"left\",\n",
        "      \"canLabel\": false\n",
        "    },\n",
        "    {\n",
        "      \"messageId\": \"message-1\",\n",
        "      \"timestampUsec\": 1530718503,\n",
        "      \"content\": \"I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!\",\n",
        "      \"user\": {\n",
        "        \"userId\": \"User 00686\",\n",
        "        \"name\": \"User\"\n",
        "      },\n",
        "      \"align\": \"right\",\n",
        "      \"canLabel\": true\n",
        "    }\n",
        "\n",
        "  ],\n",
        "  \"modelOutputs\": [\n",
        "    {\n",
        "      \"title\": \"Response A\",\n",
        "      \"content\": \"I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\\n\\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\\n\\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.\",\n",
        "      \"modelConfigName\": \"GPT-3.5 with temperature 0\"\n",
        "    },\n",
        "    {\n",
        "      \"title\": \"Response B\",\n",
        "      \"content\": \"I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!\",\n",
        "      \"modelConfigName\": \"Fine Tuned GPT-3.5 with demo data\"\n",
        "    }\n",
        "  ]\n",
        "}\n",
        "\"\"\""
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "global_key = \"pairwise_shooping_asset\"\n",
        "\n",
        "# Upload data rows\n",
        "convo_data = {\n",
        "    \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json\",\n",
        "    \"global_key\": global_key\n",
        "}\n",
        "\n",
        "# Create a dataset\n",
        "dataset = client.create_dataset(name=\"pairwise_annotation_demo\")\n",
        "# Create a datarows\n",
        "task = dataset.create_data_rows([convo_data])\n",
        "task.wait_till_done()\n",
        "print(\"Errors:\",task.errors)\n",
        "print(\"Failed data rows:\", task.failed_data_rows)\n"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "## Step 2: Create/select an Ontology"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "# Create an ontology with relevant classifications\n",
        "\n",
        "ontology_builder = lb.OntologyBuilder(\n",
        "  tools=[\n",
        "    lb.Tool(tool=lb.Tool.Type.NER,name=\"ner\"),\n",
        "  ],\n",
        "  classifications=[\n",
        "    lb.Classification(\n",
        "      class_type=lb.Classification.Type.RADIO,\n",
        "      scope=lb.Classification.Scope.GLOBAL,\n",
        "      name=\"Choose the best response\",\n",
        "      options=[lb.Option(value=\"Response A\"), lb.Option(value=\"Response B\"), lb.Option(value=\"Tie\")]\n",
        "    ),\n",
        "    lb.Classification(\n",
        "      class_type=lb.Classification.Type.TEXT,\n",
        "      name=\"Provide a reason for your choice\"\n",
        "    ),\n",
        "    lb.Classification(\n",
        "      class_type=lb.Classification.Type.CHECKLIST,\n",
        "      scope=lb.Classification.Scope.INDEX,\n",
        "      name=\"checklist_convo\",\n",
        "      options=[\n",
        "        lb.Option(value=\"first_checklist_answer\"),\n",
        "        lb.Option(value=\"second_checklist_answer\")\n",
        "      ]\n",
        "    ),\n",
        "    lb.Classification(\n",
        "      class_type=lb.Classification.Type.CHECKLIST,\n",
        "      name=\"nested_checklist_question\",\n",
        "      scope = lb.Classification.Scope.INDEX,\n",
        "      options=[\n",
        "          lb.Option(\"first_checklist_answer\",\n",
        "            options=[\n",
        "              lb.Classification(\n",
        "                  class_type=lb.Classification.Type.CHECKLIST,\n",
        "                  name=\"sub_checklist_question\",\n",
        "                  options=[lb.Option(\"first_sub_checklist_answer\")]\n",
        "              )\n",
        "          ])\n",
        "      ]\n",
        "    ),\n",
        "    lb.Classification(\n",
        "        class_type=lb.Classification.Type.RADIO,\n",
        "        name=\"nested_radio_question\",\n",
        "        scope = lb.Classification.Scope.GLOBAL,\n",
        "        options=[\n",
        "            lb.Option(\"first_radio_answer\",\n",
        "                options=[\n",
        "                    lb.Classification(\n",
        "                        class_type=lb.Classification.Type.RADIO,\n",
        "                        name=\"sub_radio_question\",\n",
        "                        options=[lb.Option(\"first_sub_radio_answer\")]\n",
        "                    )\n",
        "                ])\n",
        "          ]\n",
        "    )\n",
        "  ]\n",
        ")\n",
        "\n",
        "ontology = client.create_ontology(\"Pairwise comparison ontology\", ontology_builder.asdict(), media_type=lb.MediaType.Conversational)\n",
        "\n"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "## Step 3: Create a labeling project"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "# Create Labelbox project\n",
        "project = client.create_project(name=\"Conversational Text Annotation Import Demo (Pairwise comparison)\",\n",
        "                                    media_type=lb.MediaType.Conversational)\n",
        "\n",
        "# Setup your ontology\n",
        "project.setup_editor(ontology) # Connect your ontology and editor to your project"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "## Step 4: Send a batch of data rows to the project"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "# Create a batch to send to your project\n",
        "batch = project.create_batch(\n",
        "  \"first-batch-convo-demo\", # Each batch in a project must have a unique name\n",
        "  global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys\n",
        "  priority=5 # priority between 1(Highest) - 5(lowest)\n",
        ")\n",
        "\n",
        "print(\"Batch: \", batch)"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "## Step 5: Create the annotations payload"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "Python annotation"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "label = []\n",
        "label.append(\n",
        "  lb_types.Label(\n",
        "    data=lb_types.ConversationData(\n",
        "      global_key=global_key\n",
        "    ),\n",
        "    annotations=[\n",
        "      ner_annotation,\n",
        "      text_annotation,\n",
        "      checklist_annotation,\n",
        "      radio_annotation,\n",
        "      nested_radio_annotation,\n",
        "      nested_checklist_annotation\n",
        "    ]\n",
        "  )\n",
        ")"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "NDJSON annotation"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "label_ndjson = []\n",
        "for annotations in [\n",
        "    ner_annotation_ndjson,\n",
        "    text_annotation_ndjson,\n",
        "    checklist_annotation_ndjson,\n",
        "    radio_annotation_ndjson,\n",
        "    nested_checklist_annotation_ndjson,\n",
        "    nested_radio_annotation_ndjson\n",
        "    ]:\n",
        "  annotations.update({\n",
        "      \"dataRow\": {\n",
        "          \"globalKey\": global_key\n",
        "      }\n",
        "  })\n",
        "  label_ndjson.append(annotations)"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "## Step 6: Upload annotations to a project as pre-labels or complete labels "
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "### Model Assisted Labeling (MAL)"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "upload_job = lb.MALPredictionImport.create_from_objects(\n",
        "    client = client,\n",
        "    project_id = project.uid,\n",
        "    name=f\"mal_job-{str(uuid.uuid4())}\",\n",
        "    predictions=label)\n",
        "\n",
        "upload_job.wait_until_done()\n",
        "print(\"Errors:\", upload_job.errors)\n",
        "print(\"Status of uploads: \", upload_job.statuses)"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    },
    {
      "metadata": {},
      "source": [
        "## Label Import"
      ],
      "cell_type": "markdown"
    },
    {
      "metadata": {},
      "source": [
        "upload_job = lb.LabelImport.create_from_objects(\n",
        "    client = client,\n",
        "    project_id = project.uid,\n",
        "    name=\"label_import_job\"+str(uuid.uuid4()),\n",
        "    labels=label)\n",
        "\n",
        "upload_job.wait_until_done();\n",
        "print(\"Errors:\", upload_job.errors)\n",
        "print(\"Status of uploads: \", upload_job.statuses)"
      ],
      "cell_type": "code",
      "outputs": [],
      "execution_count": null
    }
  ]
}